import pandas as pd
import folium
from folium import plugins
from folium.plugins import HeatMap
import numpy as np
import matplotlib.pyplot as plt
We will use this csv file with all reviews put in businesses that are located in TORONTO
business=pd.read_csv('reviews_rest_Toronto.csv')
business.head()
business.describe()
Let's only consider the restaurants
business=business[business['categories'].notnull()==True]
restaurants=business[business['categories'].str.contains('Restaurants')]
restaurants.describe()
BanhMiBoys=restaurants[restaurants['name']=='Banh Mi Boys']
BanhMiBoys.describe()
BanhMiBoys.head()
So we have the four restaurants of Banh Mi Boys and the number of reviews they have.
BanhMiBoys['business_id'].value_counts()
The users we want to target is the users that are going to Banh Mi Boys
Target_users=BanhMiBoys['user_id'].unique()
We have 1648 target users in Toronto !
print(len(Target_users))
Let's create a list of the restaurants with the number of reviews they have
List_restaurants=(restaurants.groupby(['business_id']).count())
List_restaurants.head()
List_restaurants=List_restaurants.rename(columns={'cool': 'Number of reviews'})
List_restaurants=List_restaurants['Number of reviews']
List_restaurants.head()
List_restaurants=pd.DataFrame(List_restaurants)
List_restaurants.head()
List_restaurants=List_restaurants.reset_index(level=['business_id'])
List_restaurants.head()
We only consider the restaurants with more than five reviews
List_restaurants=List_restaurants[List_restaurants['Number of reviews']>5]
List_restaurants.describe()
List_restaurants=List_restaurants.reset_index(drop = True)
Now we will determine the percentage of users of a restaurants that are also from the target users. (the one from Banh Mi Boys)
List_business_id=List_restaurants['business_id']
List_Number_reviews=List_restaurants['Number of reviews']
frequency=[]
i=0
for B_id in List_business_id:
search=restaurants[restaurants['business_id']==B_id]
users_search=search['user_id'].unique()
rate=0
for user in users_search:
if user in Target_users:
rate=rate+1
frequency.append(rate/List_Number_reviews[i])
i=i+1
List_restaurants['frequency']=frequency
List_restaurants.head()
List_restaurants=List_restaurants.sort_values(by=['frequency'],ascending= False )
List_restaurants.head(25)
The first four Business id are Banh Mi Boys's restaurants.
Let's see who are the main competitors of Banh Mi Boys in Toronto :
competition1=restaurants[restaurants['business_id']=='ezFIe-ZDKCl3wXzRjOs-dg']
competition1.head()
Interesting
competition2=restaurants[restaurants['business_id']=='y2QENZ0Rre4EOveD0iFpBQ']
competition2.head()
competition3=restaurants[restaurants['business_id']=='5jtxiHzuaFLoJxevfJNxRQ']
competition3.head()
TaiTaiBox=restaurants[restaurants['name']=='Tai Tai Box']
TaiTaiBox.head()
Sammy=restaurants[restaurants['name']=="Sammy's Gourmet"]
buid=BanhMiBoys['business_id'].unique()
We will take the location of these restaurants
BanhMiBoys_long=[]
BanhMiBoys_lat=[]
for b in buid:
BanhMiBoys=restaurants[restaurants['business_id']==b]
long=BanhMiBoys['longitude'].reset_index(drop = True)
lat=BanhMiBoys['latitude'].reset_index(drop = True)
BanhMiBoys_long.append(long[0])
BanhMiBoys_lat.append(lat[0])
buid2=TaiTaiBox['business_id'].unique()
TaiTaiBox_long=[]
TaiTaiBox_lat=[]
for b in buid2:
TaiTaiBox=restaurants[restaurants['business_id']==b]
long=TaiTaiBox['longitude'].reset_index(drop = True)
lat=TaiTaiBox['latitude'].reset_index(drop = True)
TaiTaiBox_long.append(long[0])
TaiTaiBox_lat.append(lat[0])
buid3=Sammy['business_id'].unique()
Sammy_long=[]
Sammy_lat=[]
for b in buid3:
Sammy=restaurants[restaurants['business_id']==b]
long=Sammy['longitude'].reset_index(drop = True)
lat=Sammy['latitude'].reset_index(drop = True)
Sammy_long.append(long[0])
Sammy_lat.append(lat[0])
Come=restaurants[restaurants['name']=="Come and Get It"]
buid4=Come['business_id'].unique()
Come_long=[]
Come_lat=[]
for b in buid4:
Come=restaurants[restaurants['business_id']==b]
long=Come['longitude'].reset_index(drop = True)
lat=Come['latitude'].reset_index(drop = True)
Come_long.append(long[0])
Come_lat.append(lat[0])
import folium
from folium import plugins
from folium.plugins import HeatMap
Let's plot these restaurants in a map.
# Toronto location
lat_T = 43.651070
lon_T = -79.347015
map_T = folium.Map([lat_T, lon_T], zoom_start=10)
for i in range(len(TaiTaiBox_lat)):
# Circle marker
folium.CircleMarker([TaiTaiBox_lat[i], TaiTaiBox_long[i]], radius=3, color='red').add_to(map_T)
for i in range(len(Sammy_lat)):
# Circle marker
folium.CircleMarker([Sammy_lat[i], Sammy_long[i]], radius=3, color='yellow').add_to(map_T)
for i in range(len(Come_lat)):
# Circle marker
folium.CircleMarker([Come_lat[i], Come_long[i]], radius=3, color='pink').add_to(map_T)
for i in range(len(BanhMiBoys_lat)):
# Circle marker
folium.CircleMarker([BanhMiBoys_lat[i], BanhMiBoys_long[i]], radius=3, color='blue').add_to(map_T)
map_T
from IPython.display import Image
Image("C:/Users/beuzitlouis/Documents/DTU/Cours/Second semester/Advanced Business Analytics/Final project/Yelp/Toronto/MapToronto1.JPG")
The file Business_Metropolis can be downloaded by running the notebook: DescripitveAnalysis Yelp Dataset inside: https://github.com/hecmesge/ABA2020
Business=pd.read_excel('Business-Metropolis.xlsx')
On this file, we will consider all of the restaurants near Toronto.
Business.head()
Business=Business[Business['categories'].notnull()==True]
restaurants=Business[Business['categories'].str.contains('Restaurants')]
restaurants.head()
restaurants=restaurants[restaurants['metropolis']=='Toronto']
print(len(restaurants))
Let's say not more than 20 kilometers to Toronto
restaurants_Toronto=restaurants[restaurants['distance metropolis']<=20].reset_index(drop = True)
print(len(restaurants_Toronto))
long=restaurants_Toronto['longitude']
lat=restaurants_Toronto['latitude']
We plot all of these restaurants:
# Toronto location
lat_T = 43.651070
lon_T = -79.347015
map_T = folium.Map([lat_T, lon_T], zoom_start=10)
for i in range(len(long)):
# Circle marker
folium.CircleMarker([lat[i],long[i]], radius=1, color='black').add_to(map_T)
for i in range(len(TaiTaiBox_lat)):
# Circle marker
folium.CircleMarker([TaiTaiBox_lat[i], TaiTaiBox_long[i]], radius=3, color='red').add_to(map_T)
for i in range(len(Sammy_lat)):
# Circle marker
folium.CircleMarker([Sammy_lat[i], Sammy_long[i]], radius=3, color='yellow').add_to(map_T)
for i in range(len(Come_lat)):
# Circle marker
folium.CircleMarker([Come_lat[i], Come_long[i]], radius=3, color='pink').add_to(map_T)
for i in range(len(BanhMiBoys_lat)):
# Circle marker
folium.CircleMarker([BanhMiBoys_lat[i], BanhMiBoys_long[i]], radius=3, color='blue').add_to(map_T)
map_T
Image("C:/Users/beuzitlouis/Documents/DTU/Cours/Second semester/Advanced Business Analytics/Final project/Yelp/Toronto/MapToronto2.JPG")
In this map, there is Banh Mi Boys's restaurants (in blue), the main competitors of Banh Mi Boys (pink, yellow and red) and the restaurants in the city of Toronto (in Black). These restaurants correspond to the demand in the area (principle of equilibrium)
from sklearn.cluster import KMeans
points=[]
for i in range(len(long)):
point=[]
point.append(long[i])
point.append(lat[i])
points.append(point)
plt.scatter(long,lat, s=2)
We have the four restaurants of Banh Mi Boys and four main competitors, if we were to open a fifth restaurant for Banh Mi Boys, that is to say a ninth restaurant among all of the restaurants we have chosen, were would we put this new restaurant ?
Let's do a clustering on all of the restaurants in Toronto, indeed, we consider that the restaurants are here because there is a demand, so the more there are restaurants in an area, the more customers there are.
Finally, the idea is to analyze where Banh Mi Boys could install a new restaurant in order to win market shares where his competitors aren't already present.
km=KMeans(9)
km.fit(points)
plt.scatter(km.cluster_centers_[:,0], km.cluster_centers_[:,1]);
len(km.cluster_centers_)
# Toronto location
lat_T = 43.651070
lon_T = -79.347015
map_T = folium.Map([lat_T, lon_T], zoom_start=10)
for i in range(len(long)):
# Circle marker
folium.CircleMarker([lat[i],long[i]], radius=1, color='black').add_to(map_T)
for i in range(len(TaiTaiBox_lat)):
# Circle marker
folium.CircleMarker([TaiTaiBox_lat[i], TaiTaiBox_long[i]], radius=3, color='red').add_to(map_T)
for i in range(len(Sammy_lat)):
# Circle marker
folium.CircleMarker([Sammy_lat[i], Sammy_long[i]], radius=3, color='yellow').add_to(map_T)
for i in range(len(Come_lat)):
# Circle marker
folium.CircleMarker([Come_lat[i], Come_long[i]], radius=3, color='pink').add_to(map_T)
for i in range(len(BanhMiBoys_lat)):
# Circle marker
folium.CircleMarker([BanhMiBoys_lat[i], BanhMiBoys_long[i]], radius=3, color='blue').add_to(map_T)
for i in range(len(km.cluster_centers_)):
folium.CircleMarker([km.cluster_centers_[i,1], km.cluster_centers_[i,0]], radius=3, color='cyan').add_to(map_T)
map_T
Image("C:/Users/beuzitlouis/Documents/DTU/Cours/Second semester/Advanced Business Analytics/Final project/Yelp/Toronto/MapToronto3.JPG")
In this map, there is Banh Mi Boys's restaurants (in blue), the main competitors of Banh Mi Boys (pink, yellow and red), the restaurants in the city of Toronto (in Black) and the clusters of demands for restaurants (in cyan).
all in all, Banh Mi Boys could take the opportunity to open a new restaurant on the east of Toronto or on the west where neither Banh mi Boys nor his competitors are present.
For example if Banh Mi Boys wanted to improve his delivery system, to expand his area of influence in these two areas mentionned above.